--- title: Caseload Modelling Introduction keywords: fastai sidebar: home_sidebar nb_path: "20. Caseload Modelling Introduction.ipynb" ---
import pandas as pd
import numpy as np
from datetime import datetime
import altair as alt
from vega_datasets import data
_=alt.data_transformers.disable_max_rows()
#year_portion_limit = 0.8
debug = 1
CESSATION_CONTINUATION = {
'Driver Deceased': 'Deceased',
'Drivers found medically unfit to drive':'Cessation',
'Drivers that did not respond; cancelled license':'Cessation',
'Drivers that voluntarily surrendered their license':'Cessation',
'Drivers ultimately found fit to drive':'Continuation',
'Cases remaining open at time of reporting':'Continuation'
}
data_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\\'
generated_file_path = r'C:\Users\mbeaulieu\rsi_project_book\Data\Generated\\'
# caseload_data_by_month['Opened Month'] = caseload_data_by_month.apply(lambda x: x['CASE_OPENED_DT'].strftime('%b') + '-' + x['CASE_OPENED_DT'].strftime('%Y'), axis=1)
#hideinput
def process_file(file_name):
#'BIRTHDATE, LAST_STATUS_DATE, PREV_CASE_END_DT'
data = pd.read_csv(file_name, parse_dates=['CASE_OPENED_DT'], dtype={'DRIVERS_LICENSE_NO': str})
return data
data2020_fpath = data_file_path + 'generated\Sep_2020_toNov2021_ADJUDICATEDCASELOAD.csv'
Sept2020_exploded_df = process_file(data2020_fpath)
#caseload_data_by_month
# %run ./helpers.py
# r = derive_statuscount_percase(cases2018_df, ftedays_df)
# monthly_summary_statuscounts = r[0]
aggregation = {
# 'Case Count': ('DRIVERS_LICENSE_NO','nunique'),
'Open Count': ('Open Count','sum'),
'Closed Count': ('Closed Count','sum'),
'Status Count': ('STATUS_COUNT','sum'),
}
# Sept2020_monthly_counts = Sept2020_exploded_df.groupby([pd.Grouper(freq='M', key='CASE_OPENED_DT') ,
# pd.Grouper(key='Year Span'),
# pd.Grouper(key='CASE_DSC'),
# pd.Grouper(key='Status'),
# pd.Grouper(key='DRIVERS_LICENSE_NO'),
# ]).agg(** aggregation)
# Sept2020_monthly_counts = pd.DataFrame(Sept2020_monthly_counts).reset_index()
Adjudicated_Sept2020_exploded_df = Sept2020_exploded_df[Sept2020_exploded_df['Is Adjudicated'] == 'Adjudicated']
Adjudicated_Sept2020_monthly_counts = Adjudicated_Sept2020_exploded_df.groupby([pd.Grouper(freq='M', key='CASE_OPENED_DT') ,
pd.Grouper(key='Year Span'),
pd.Grouper(key='CASE_DSC'),
pd.Grouper(key='Status'),
pd.Grouper(key='DRIVERS_LICENSE_NO'),
]).agg(** aggregation)
Adjudicated_Sept2020_monthly_counts = pd.DataFrame(Adjudicated_Sept2020_monthly_counts).reset_index()
# aggregation = {
# 'Case Count': ('DRIVERS_LICENSE_NO','nunique'),
# 'Open Count': ('Open Count','sum'),
# 'Status Count': ('STATUS_COUNT','sum'),
# }
# Adjudicated_Sept2020_exploded_df = Sept2020_exploded_df[Sept2020_exploded_df['Is Adjudicated'] == 'Adjudicated']
# Adjudicated_Sept2020_monthly_counts = Adjudicated_Sept2020_exploded_df.groupby([ pd.Grouper(key='Year Span'),
# pd.Grouper(key='Status'),
# pd.Grouper(key='CASE_DSC'),
# ]).agg(** aggregation)
# Adjudicated_Sept2020_monthly_counts = Adjudicated_Sept2020_monthly_counts.reset_index()
# Adjudicated_Sept2020_monthly_counts
montly_backlog_all = alt.Chart(Adjudicated_Sept2020_monthly_counts).mark_area(point=True).encode(
y = alt.Y("sum(Open Count):Q" ),
x = alt.X("Year Span:T", scale=alt.Scale(zero=False) ),
color = "CASE_DSC:N",
tooltip=['sum(Open Count)', 'sum(Status Count)'],
#column='Status'
).properties(
width=600,
height=200
)
montly_backlog_all
montly_backlog_all = alt.Chart(Adjudicated_Sept2020_monthly_counts).mark_area(point=True).encode(
y = alt.Y("sum(Closed Count):Q" ),
x = alt.X("Year Span:T", scale=alt.Scale(zero=False) ),
color = "CASE_DSC:N",
tooltip=['sum(Closed Count)', 'sum(Status Count)'],
#column='Status'
).properties(
width=600,
height=200
)
montly_backlog_all